# !usr/bin/env python
# -*- coding:utf-8 _*-
"""
@Author:张广勤
@Web site: https://www.tunan.wang
@Github:www.github.com
 
@File:html2txt3_0.py
@Time:2024/9/4 22:16

@Motto:不积跬步无以至千里，不积小流无以成江海！
"""

import html2text

# HTML内容
html_content = """
<html>
  <body>
    <h1>Welcome to My Website</h1>
    <p>This is a <strong>sample</strong> paragraph.</p >
    <ul>
      <li>中文可以吗</li>
      <li>Item 2</li>
    </ul>
  </body>
</html>
"""

# 创建html2text对象
h = html2text.HTML2Text()

# 将HTML转换为文本
text = h.handle(html_content)

print(text)
